from flask import jsonify,session,redirect
import json
import hashlib
import re
import requests
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from lxml import etree
import time


def md5(code):
  hl = hashlib.md5()
  hl.update(code.encode("utf-8"))
  return hl.hexdigest()

def msg_json(code,msg,data=''):
  res = {
    'code':code,
    'msg':msg,
    'data':data
  }
  resjson = jsonify(res)
  return resjson

def msg_json_js(code,msg,data=''):
  res = {
    'code':code,
    'msg':msg,
    'data':data
  }
  resjson = json.dumps(res)
  return resjson


def is_login(func):
  def inner(*args,**kwargs):
    user = session.get('user')
    if not user:
      return redirect('login')
    return func(*args,**kwargs)
  return inner

def is_url(url):
  p = re.compile(r'^((https|http)?:\/\/)[^\s]+')
  x = p.match(url)
  if x != None:
    return True
  return False
# def saveCookiesForUser(cookie):
#   ck_dict = requests.utils.dict_from_cookiejar(cookie)
#   with open('.cookie', 'w') as f:
#     ck_dict = str(ck_dict).replace("'", '"')
#     f.write(ck_dict)
#     f.close()

# def loadCookiesForUser():
#     with open('.cookie','r') as fp1:
#         load_cookies = json.load(fp1)
#     return load_cookies

def get_list(url, rex, type=1, expt=None,charset='utf-8'):
    try:
      ua = UserAgent()
      headers = {
        'User-Agent': str(ua)
      }
      res = requests.get(url, headers=headers)
      content = res.content.decode(charset)
      if type == 1:
        return get_list_re(content, rex, expt)
      elif type == 2:
        return get_list_class(content, rex)
      elif type == 3:
        return get_list_xpath(content, rex)
      else:
        return None
    except Exception as e:
      print(e)
      return None


def get_list_re(content, reurlrex, reurlrex1=None):
  try:
    if not reurlrex1:
      reurlrex1 = 'href=[\'\"](.*?)[\'\"]'
    rex = re.findall(reurlrex, content)
    rex1 = re.findall(reurlrex1, rex[0])
    rex2 = list(set(rex1))
    return rex2
  except Exception as e:
    return None


def get_list_class(content, class_):

  try:
    data = []
    soup = BeautifulSoup(content, 'lxml')
    list1 = soup.select(class_)

    if len(list1) > 0:
      for i in list1:
        data.append(i['href'])
    return list(set(data))
  except Exception as e:
    return None


def get_list_xpath(content, xpath):
  try:
    html = etree.HTML(content)
    res = html.xpath(xpath)
    return res
  except Exception as e:
    return None

def musthas(urllist,value):
  tem = []
  for i in urllist:
    if i.find(value)>=0:
      tem.append(i)
  return tem

def nothas(urllist,value):
  tem = []
  for i in urllist:
    if i.find(value)<0:
      tem.append(i)
  return tem

##过滤HTML中的标签
# 将HTML中标签等信息去掉
# @param htmlstr HTML字符串.
def filter_tags(htmlstr,seft="######"):
  # 先过滤CDATA
  re_cdata = re.compile('//<![CDATA[[^>]*//]]>', re.I)  # 匹配CDATA
  re_script = re.compile('<s*script[^>]*>[^<]*<s*/s*scripts*>', re.I)  # Script
  re_style = re.compile('<s*style[^>]*>[^<]*<s*/s*styles*>', re.I)  # style
  re_br = re.compile('<br.?/>')  # 处理换行
  re_h = re.compile('</?[\w\W^>]*?>')  # HTML标签
  re_comment = re.compile('<!--[^>]*-->')  # HTML注释
  s = re_cdata.sub('', htmlstr)  # 去掉CDATA
  s = s.replace("</p>", "\n")
  s = s.replace("</h3>", "\n")
  s = re_br.sub('\n', s)  # 将br转换为换行
  s = re_script.sub('', s)  # 去掉SCRIPT
  s = re_style.sub('', s)  # 去掉style
  s = re_h.sub('', s)  # 去掉HTML 标签
  s = re_comment.sub('', s)  # 去掉HTML注释
  # 去掉多余的空行
  blank_line = re.compile('n+')
  s = blank_line.sub('n', s)
  s = replaceCharEntity(s)  # 替换实体
  s = s.replace("\t", "")
  s = s.replace("\r", "")
  s = s.replace(" ", "")
  s = s.replace("&ldquo;", "")
  s = s.replace("&rdquo;", "")
  s = s.replace("&nbsp;", "")
  s = s.replace("&mdash;", "")
  s = s.replace("&amp;", "")
  #去掉多余换行
  tem = ''
  for tt in s.splitlines():
    if tt:
      tt = tt.rstrip() + seft
      tem = tem + tt
  return tem

# ifgjgu
#
def replace_ifg(data):
  s = data.replace("/ p & gt", "")
  s = s.replace("& gt; p & gt;", "")
  s = s.replace("& gt;", "")
  s = s.replace("& lt", "")
  s = s.replace("(p & gt", "")
  s = s.replace("; p & gt", "")
  s = s.replace("/ p", "<br />")
  return s

##替换常用HTML字符实体.
# 使用正常的字符替换HTML中特殊的字符实体.
# 你可以添加新的实体字符到CHAR_ENTITIES中,处理更多HTML字符实体.
# @param htmlstr HTML字符串.
def replaceCharEntity(htmlstr):
  CHAR_ENTITIES = {'nbsp': ' ', '160': ' ',
                   'lt': '<', '60': '<',
                   'gt': '>', '62': '>',
                   'amp': '&', '38': '&',
                   'quot': '"', '34': '"', }

  re_charEntity = re.compile(r'&#?(?P<name>w+);')
  sz = re_charEntity.search(htmlstr)
  while sz:
    entity = sz.group()  # entity全称，如>
    key = sz.group('name')  # 去除&;后entity,如>为gt
    try:
      htmlstr = re_charEntity.sub(CHAR_ENTITIES[key], htmlstr, 1)
      sz = re_charEntity.search(htmlstr)
    except KeyError:
      # 以空串代替
      htmlstr = re_charEntity.sub('', htmlstr, 1)
      sz = re_charEntity.search(htmlstr)
  return htmlstr


# 为每行包裹上标签
def outhtml(str1):
  str1 = "<p>{}</p>\r\n".format(str1)
  return str1


def getifgstatus(d):
  today = int(time.strftime("%Y%m%d", time.localtime()))
  if d == today:
    return True
  else:
    return False

def getpagecount(perpage,maxcount):
  count = int(maxcount/perpage)
  if maxcount % perpage != 0:
    count = count+1
  return count